*********************************************************************************************
* This replication file estimates the production model (Table 6) and the mark-ups (Table 7) *
*********************************************************************************************
*1. Predict output levels based on the demand model. These are used as instruments in the later estimation.
*2. Define output variables and impute input variable where missing.
*3. Estimate the production function and calculate the implied mark-ups.
*4. Store the price, variable costs, fixed costs and transportation costs to be used for counterfactuals and estimation of fixed cost bounds.

*************************
* 0 Set path to files *
*************************
clear all
capture log close
set more off
set trace off
mata: mata set matastrict off
cd $main_directory

//Set predetermined values:
sca scale=10
//Load the data and the commands used in the estimation of the spatial demand model
use "Data_Belgium\Generated_data\baseline_demand_dataset.dta"

qui do "Command_files\spatial_demand_Q2.do"
//Load the parameter estimates from the demand estimation
mata: mata matuse "Estimates\betas_Q", replace
mata: mata matuse "Estimates\betas_Q1", replace
mata: mata matuse "Estimates\betas_Q2", replace

*************************************************************************************************************
* 1 Predict output levels based on the demand model. These are used as instruments in the later estimation. *
*************************************************************************************************************
* 1.1 Predicting total output *

//Calculate the total population on the market
bysort market_ID:  gen dup_market = cond(_N==1,0,_n)
qui su population if dup_market<2
sca sumPopulation=r(sum)
sca di sumPopulation

//Generate a unique numeric identifier for the market.
sort market_ID
egen market_ID_num = group(market_ID)

//TOTAL OUTPUT
//Calculate the total demand for notary transactions on the market
qui su Q if common==1 
sca sumQ=r(sum)
sca di sumQ

//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma_Q=scale*sumQ/sumPopulation
sca di gamma_Q
//Calculate the market size (i.e. number of possible transactions)
gen market_size_Q=gamma_Q*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand_Q=Q 
drop if firm_demand_Q==.
drop if firm_demand_Q<=0

//Define the control variables of the utility function
global controls_utility distance lnNotPerOff start_not LDE Perc_Young Perc_Elderly log_income popdens
//Define the entire set of variables necessary for the estimation of the demand model
global controls_total_Q firm_ID firm_demand_Q market_ID_num market_size_Q $controls_utility


//Demand prediction Q_IV
sort market_ID
mata:
 X    = st_data(., "$controls_total_Q") //This selects the corresponding control variables.
    nx    = rows(X) //This defines the number of market-notary observations
    X    = X,J(nx,1,1) //This generates a constant
	Results=spdemand_log(X,betas_Q) //This estimates the demand model. The first column of this matrix contains the firm_IDs. The second column contains the log of the predicted output of the firm.
	n_firms=rows(Results) //Calculate the number of firms
    st_numscalar("n_firms",n_firms) //Export the number of firms to Stata
	firm_ID    = st_data(., "firm_ID")  //Import the firm IDs, since we will be matching the estimated sales to the firm for which they are estimated
	Prediction=J(nx,1,.) // Make an empty vector to store the predicted output. Since we are still working with the demand dataset, where notaries occur multiple times, we will be loading the results multiple times
	for(i=1; i<=nx; i++) { //i goes over the market-notary combinations
    for(j=1; j<=n_firms; j++) { //j goes over all the firms
	if (firm_ID[i]==Results[j,1]) { //If the firm_ID in the observation of the demand dataset matches the firm_ID in the results matrix from the estimation,
		Prediction[i]=Results[j,2] //then the estimated sales for that notary are saved in the prediction vector in the element of the demand dataset corresponding to the notary.
	}
	}
	}
prediction_exp=exp(Prediction) //Since the prediction variable is in logs, we take the exponent.
//We export the predicted values as Q_IV (for instrumental variable).
st_addvar("double","Q_IV")
st_store(.,"Q_IV",prediction_exp)
end

* 1.2 Predicting real-estate sales (Q1)*
//Calculate the total demand for notary transactions on the market
qui su Q1 if common==1 
sca sumQ1=r(sum)
sca di sumQ1

//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma_Q1=scale*sumQ1/sumPopulation
sca di gamma_Q1
//Calculate the market size (i.e. number of possible transactions)
gen market_size_Q1=gamma_Q1*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand_Q1=Q1
drop if firm_demand_Q1==.
drop if firm_demand_Q1<=0

//Define the entire set of variables necessary for the estimation of the demand model
global controls_total_Q1 firm_ID firm_demand_Q1 market_ID_num market_size_Q1 $controls_utility

//Demand prediction Q1_IV (see "Demand prediction Q_IV" above for an explanation of the process)
sort market_ID
mata:
 X    = st_data(., "$controls_total_Q1")
    nx    = rows(X)
    X    = X,J(nx,1,1)
	Results=spdemand_log(X,betas_Q1)
	n_firms=rows(Results)
    st_numscalar("n_firms",n_firms)
	firm_ID    = st_data(., "firm_ID") 
	Prediction=J(nx,1,.)
	for(i=1; i<=nx; i++) {
    for(j=1; j<=n_firms; j++) {
	if (firm_ID[i]==Results[j,1]) {
		Prediction[i]=Results[j,2]
	}
	}
	}
prediction_exp=exp(Prediction)
st_addvar("double","Q1_IV")
st_store(.,"Q1_IV",prediction_exp)
end

* 1.3 Predicting real-estate sales (Q2)*
//Calculate the total demand for notary transactions on the market
qui su Q2 if common==1 
sca sumQ2=r(sum)
sca di sumQ2

//Calculate total potential demand per capita as 10 times larger than the current value.
scalar gamma_Q2=scale*sumQ2/sumPopulation
sca di gamma_Q2
//Calculate the market size (i.e. number of possible transactions)
gen market_size_Q2=gamma_Q2*population

//Define the product for which demand will be estimated (in this case total transactions).
gen firm_demand_Q2=Q2
drop if firm_demand_Q2==.
drop if firm_demand_Q2<=0

//Define the entire set of variables necessary for the estimation of the demand model
global controls_total_Q2 firm_ID firm_demand_Q2 market_ID_num market_size_Q2 $controls_utility

//Demand prediction Q2_IV (see "Demand prediction Q_IV" above for an explanation of the process)
sort market_ID
mata:
 X    = st_data(., "$controls_total_Q2") 
    nx    = rows(X)
    X    = X,J(nx,1,1)
	Results=spdemand_log(X,betas_Q2)
	n_firms=rows(Results)
    st_numscalar("n_firms",n_firms)
	firm_ID    = st_data(., "firm_ID") 
	Prediction=J(nx,1,.)
	for(i=1; i<=nx; i++) { 
    for(j=1; j<=n_firms; j++) {
	if (firm_ID[i]==Results[j,1]) {
		Prediction[i]=Results[j,2]
	}
	}
	}
prediction_exp=exp(Prediction)
st_addvar("double","Q2_IV")
st_store(.,"Q2_IV",prediction_exp)
end

//Keep only 1 observation per notary (the one corresponding to their home market)
keep if common==1 

***********************************************************************
* 2. Define output variables and impute input variable where missing. *
***********************************************************************
// 2.1 Transaction variables
gen lnQ_IV=log(Q_IV)
gen lnQ1_IV=log(Q1_IV)
gen lnQ2_IV=log(Q2_IV)
gen lnQ=log(Q)
gen lnQ1=log(Q1)
gen lnQ2=log(Q2)

//2.2 Dealing with missing values
replace HoursFTE=. if HoursFTE==0
replace StaffCostFTE=. if StaffCostFTE==0
replace NAddVal=. if NAddVal==0

//Log of relevant variables
gen lnStaffCostFTE=log(StaffCostFTE) //Labor demand measured in staffcosts
gen lnIntDM2=log(IntDM2) //Intermediate inputs based on 2 products
gen lnIntD=log(IntD) //Intermediate inputs based on a single product

***************************************************************************
* 3. Estimate the production function and calculate the implies mark-ups. *
***************************************************************************
* 3.1 Single-product production function *

//Production function estimation
//OLS single product (Column 1 of Table 6)
gmm (eq1:-lnStaffCostFTE + (1/{nu=1})*lnQ - {aL}) ///
    (eq2:-{phi}*lnIntD + (1/{nu=1})*lnQ - {aM}), ///
	instruments(eq1:lnQ) instruments(eq2:lnQ) ///
	winitial(unadjusted, independent) wmatrix(unadjusted)
	est sto OLS_single
//GMM single product (Column 2 of Table 6)
gmm (eq1:-lnStaffCostFTE + (1/{nu=1})*lnQ - {aL}) ///
    (eq2:-{phi}*lnIntD + (1/{nu=1})*lnQ - {aM}), ///
	instruments(eq1:lnQ_IV) instruments(eq2:lnQ_IV) ///
	winitial(unadjusted, independent) wmatrix(unadjusted) 
	est sto GMM_single

sca nu=[nu]_cons
sca phi=[phi]_cons
sca aL=[aL]_cons
sca aM=[aM]_cons

//Input demand functions
gen L_Q=exp(-aL)*(Q_IV^(1/nu))
gen M_Q=exp(-aM/phi)*(Q_IV^(1/(nu*phi)))

//Variable cost and marginal cost functions
gen C_Q=L_Q+M_Q
gen MC_Q=(L_Q+(1/phi)*M_Q)*(1/nu)*(1/Q_IV)
sca p_mean=1.768066600306154035
gen amarkup=(p_mean-MC_Q)
gen markup=(p_mean-MC_Q)/p_mean //model markup
di p_mean
sum MC_Q amarkup markup

* 3.2 Multi-product production function *
//OLS multiple products (Column 3 of Table 6)
gmm (eq1:-lnStaffCostFTE + (1/{nu=1})*ln({alpha}*Q1+(1-{alpha})*Q2) - {aL}) ///
	(eq2:-{phi}*lnIntDM2 + (1/{nu=1})*ln({alpha}*Q1+(1-{alpha})*Q2) - {aM}), ///
	instruments(eq1: lnQ1 lnQ2) instruments(eq2: lnQ1 lnQ2) ///
	winitial(unadjusted, independent) wmatrix(unadjusted)
		est sto OLS_multiple

//GMM multiple products (Column 4 of Table 6)
gmm (eq1:-lnStaffCostFTE + (1/{nu=1})*ln({alpha}*Q1+(1-{alpha})*Q2) - {aL}) ///
	(eq2:-{phi}*lnIntDM2 + (1/{nu=1})*ln({alpha}*Q1+(1-{alpha})*Q2) - {aM}), ///
	instruments(eq1: lnQ1_IV lnQ2_IV) instruments(eq2: lnQ1_IV lnQ2_IV) ///
	winitial(unadjusted, independent) wmatrix(unadjusted)
		est sto GMM_multiple
	
nlcom [alpha]_cons-0.5
nlcom [nu]_cons*[phi]_cons-1

sca nu2=[nu]_cons
mata: nu = st_numscalar("nu2")
sca phi2=[phi]_cons
mata: phi = st_numscalar("phi2")
sca aL2=[aL]_cons
mata: aL = st_numscalar("aL2")
sca aM2=[aM]_cons
mata: aM = st_numscalar("aM2")
sca alpha=[alpha]_cons
mata: alpha = st_numscalar("alpha")

//Input demand functions
gen L2_Q=exp(-aL2)*(alpha*Q1_IV+(1-alpha)*Q2_IV)^(1/nu2)
gen M2_Q=exp(-aM2/phi2)*(alpha*Q1_IV+(1-alpha)*Q2_IV)^(1/(nu2*phi2))

//Variable cost and marginal cost functions
gen C2_Q=L2_Q+M2_Q
gen MC2_Q1=(L2_Q+(1/phi2)*M2_Q)*(alpha/nu2)*(1/(alpha*Q1_IV+(1-alpha)*Q2_IV))
gen MC2_Q2=(L2_Q+(1/phi2)*M2_Q)*((1-alpha)/nu2)*(1/(alpha*Q1_IV+(1-alpha)*Q2_IV))

//Mark-up calculation
sca p_Q1=2.052049204309598807 //Average price for real-estate
sca p_Q2=1.390429979033256602 //Average price for other transactions



gen markup1=(p_Q1-MC2_Q1)/p_Q1 //model markup
gen markup2=(p_Q2-MC2_Q2)/p_Q2 //model markup
gen amarkup1=(p_Q1-MC2_Q1) 
gen amarkup2=(p_Q2-MC2_Q2) 
gen markup_avg=(p_Q1*Q1_IV+p_Q2*Q2_IV-C2_Q)/(p_Q1*Q1_IV+p_Q2*Q2_IV)

//Table 7
sca di p_Q1 p_Q2
sum MC2_Q1 amarkup1 markup1 
sum MC2_Q2 amarkup2 markup2

//Store the price information
mata: p_Q1 = st_numscalar("p_Q1")
mata: p_Q2 = st_numscalar("p_Q2")

//Fixed costs
gen DA=EBITDA-EBIT
gen DAPerNot=DA/NotPerOff
su DAPerNot if DAPerNot>0, de
sca fixed_cost=r(mean)
sca fixed_cost=round(fixed_cost)
mata: fixed_cost = st_numscalar("fixed_cost")

//Transportation costs
mata: transportation_cost=100/1000 //Transportation cost is thousands of Euro.

mata: mata matsave "Estimates/cost_price_parameters" p_Q1 p_Q2 nu phi aL aM alpha fixed_cost transportation_cost, replace

